import requests,re,os
import subprocess

# 目标地址，新概念英语第2册第4课的地址
url = 'https://pl-ali.youku.com/playlist/m3u8?vid=XNTk1MDg2NzA4&type=mp4&ups_client_netip=6a2f8ecb&utid=vt4oGMH5%2BRYCASp6MzAigVNM&ccode=0502&psid=fee1d75c42219b85fb8207a87043cfa243346&app_ver=2.1.67&duration=836&expire=18000&drm_type=1&drm_device=7&nt=1&play_ability=16782592&media_type=standard,subtitle&dyt=1&btf=&rid=200000003928847DD7F8F0A151EC81A8104E70E602000000&ups_ts=1619786574&onOff=0&encr=0&ups_key=2169d05b60c90f3a87e3e8db72455f1a'

#设置代理
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
            AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
           }

#基于re匹配抓取各短视频链接
pattern = re.compile(r'http://(.+?)ts_keyframe=1.*')
urls = pattern.findall(requests.get(url,headers=headers).text)
prefix = 'http://'
postfix = 'ts_keyframe=1'
links = [prefix+item+postfix for item in urls]

#获取各短视频并保存到本地
for index,url in enumerate(links):
    f = open(str(index)+'.mp4','wb')
    f.write(requests.get(url,headers=headers).content)

#读取保存到本地的短视频mp4然后使用ffmpeg进行合并
#先将mp4转换为ts流
concat="concat:"
for item in os.listdir(os.getcwd()):
    # # print(item)
    cmd = 'ffmpeg -i '+item+' -vcodec copy -acodec copy -vbsf h264_mp4toannexb '+item.split('.')[0]+'.ts'
    # # print(cmd)
    subprocess.call(cmd)
    concat+=item.split('.')[0]+".ts|"

#然后将ts流合并为一个完整的mp4并保存到一个目录中
path = os.getcwd()
dir = '新概念英语'
if not os.path.exists(dir):os.mkdir(dir)
output = path+dir+'/第4课.mp4'
subprocess.call('ffmpeg -i "{}" -acodec copy -vcodec copy -absf aac_adtstoasc {}'.format(concat[:-1],output))

#最后删除短视频mp4和ts格式的文件，仅保存合并后的mp4
f_list = os.listdir(os.getcwd())
for file in f_list:
    if os.path.splitext(file)[1] == '.ts':
        os.remove(file)
    elif os.path.splitext(file)[1] == '.mp4':
        os.remove(file)

print("恭喜完成。。。。。")